import os
import requests
import re
from redis import StrictRedis

db = StrictRedis(host="localhost", port=6379, db=0, decode_responses=True)


# 执行逻辑
def start():
    # 记录转存了多少个
    count = 0

    # 先进行正则编译
    # https://t00y.com/file/16058819-412851136
    # https://n802.com/file/16058819-392187481
    comp = re.compile('(https://(t00y|n802).com/file/\d+-\d+)')

    # 本地保存的 大量的详情页
    base_path = r"C:\Users\admin1\Desktop\html\html"
    for file in os.listdir(base_path):

        # 单个文件路径
        path = f"{base_path}\\{file}"

        # 读取文件, 使用正则来匹配网盘的链接
        with open(path, "r", encoding="utf-8") as f:
            text = f.read()
        resus = comp.findall(text)

        if len(resus) != 0:
            print(f"{file} 有网盘链接")
            # url: 每一个网盘链接的url
            for resu in resus:
                url = resu[0]
                nums = url.split("/")[-1]
                uid = nums.split("-")[0]
                file_id = nums.split("-")[1]
                print(resu[0], uid, file_id)

                # 增量爬虫
                nums = f"{uid}-{file_id}"

                if db.sadd("shudan", nums):
                    save(uid, file_id)
                    count += 1
                    print(f"第{count}条保存完毕")
                else:
                    print(f"{url} 已经保存过")

        else:
            print(f"{file} 没有网盘链接")


# 转存到网盘
def save(uid, file_id):
    url1 = "https://webapi.ctfile.com/api.php?item=file_act&action=search_file_copy&uid={}&file_id={}&token=false"
    h1 = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
        "origin": "https: // t00y.com",
        "referer": "https: // t00y.com / file / 16058819 - 408028972",
        "cookie": 'ct_uid=d5fb48d88faafebe7303b04f4ef58990; PHPSESSID=d2a55khmjfiapd4mftn7lu2bf6; ua_checkmutilogin=LQ68djNN7l; pubcookie=A2YENAM6UjBRYgRpAmRWOQBbAjZTD1NwUmpQbAViUjNRdA8_DDNTN1QRVDFRMABoBXlRMgY8ATtYAVN3UjlWOAMyBHEDelJoUWUEaQJsVjsAWwJ2U25Ta1JvUGIFfFJjUTQPPwxAUzdUZ1QzUSgAOAU4UTwGWgFnWD5TMVJtVmEDYgQ2Az1SNVFjBFkCYFY9ADECZFM1U2FSYVA7BT9SNFE2DzsMYlNkVGNUOVE_AGkFZFFkBmABZlhtU2NSYVY3A2EEMgM7UjRRZgQ0; token=84ecdfb5f4570cc61edc3513e45010e9'
    }
    requests.get(url1, headers=h1)
    post_url = "https://webapi.ctfile.com/api.php?item=file_act&token=false"
    form_data = {
        'action': 'search_file_copy',
        'task': 'search_file_copy',
        'formhash': '',
        'file_id': file_id,
        'uid': uid,
        'folder_path': '/',
    }
    requests.post(post_url, data=form_data, headers=h1)
    print("转存成功")


# 主函数
def main():
    start()


if __name__ == '__main__':
    main()
